# This script is part 1 of the pipeline; It:
# 1. imports EddyPro full_output and biomet data 
# 2. performs quality filtering, low frequency despiking and fetch filtering using openeddy R package (Mauder et al. (2013)
# 3. exports the results as a *_openeddy.csv for further processing
# 
# Felix Nieberding
# 2020-08-26


rm(list=ls())
Sys.setenv(TZ='UTC')

library(tidyverse)
library(lubridate)
library(data.table)
library(openeddy)
library(REddyProc)
library(cowplot)

# select the respective dataset
dataset <- "ORG"
# dataset <- "WLG"

# Import data -------------------------------------------------------------
# import EddyPro full_output files 
setwd(dir = paste("~/TransTiP/_NamCo_sync/2_data_processed/Flux data/NAMORS_ECNM/processing 2/20200807/output_", dataset, sep = ""))

df_full_output <-
  do.call(rbind, lapply(list.files(pattern = "*full_output_\\d{4}-\\d{2}-\\d{2}T\\d{6}_adv.csv"), 
                        function(x) read_eddy(x, header = T, skip = 1, units = T, units_fill = "-", colClasses = cols(.default = col_guess()),
                                              stringsAsFactors = FALSE, encoding = "UTF-8", check_input = FALSE, na.strings = "-9999"))) %>%
  mutate(DATETIME = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M")) %>%
  left_join(data.frame(DATETIME = seq.POSIXt(min(.$DATETIME), max(.$DATETIME), by = "30 min")), .) %>%
  mutate(date = as_date(DATETIME),
         time = format(DATETIME, format="%H:%M"))

test <- data.frame(DATETIME = seq.POSIXt(min(df_full_output$DATETIME), max(df_full_output$DATETIME), by = "30 min"))

df_full_output$DATETIME[273:275]
test$DATETIME[273:275]

# import EddyPro biomet files
df_biomet <- do.call(rbind, lapply(list.files(pattern = "*biomet_\\d{4}-\\d{2}-\\d{2}T\\d{6}_adv.csv"), 
                                   function(x) read_eddy(x, header = T, units = T, units_fill = "-", colClasses = cols(.default = col_guess()),
                                                         stringsAsFactors = FALSE, encoding = "UTF-8", check_input = FALSE, na.strings = "-9999"))) %>%
  mutate(DATETIME = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M")) %>%
  select(-date, -time, -DOY)  %>%
  left_join(data.frame(DATETIME = seq.POSIXt(min(.$DATETIME), max(.$DATETIME), by = "30 min")), .) 
  

# import EddyPro stat7 files
setwd(dir = "./eddypro_stats/")
df_stat7 <- do.call(rbind, lapply(list.files(pattern = "*st7_\\d{4}-\\d{2}-\\d{2}T\\d{6}_adv.csv"), 
                                  function(x) read_eddy(x, header = T, units = T, units_fill = "-", colClasses = cols(.default = col_guess()), skip = 1,
                                                        stringsAsFactors = FALSE, encoding = "UTF-8", check_input = FALSE, na.strings = "-9999"))) %>%
  mutate(DATETIME = as.POSIXct(paste(date, time), format = "%Y-%m-%d %H:%M")) %>%
  select(DATETIME, mean.ts.,mean.co2., mean.h2o.) %>%
  left_join(data.frame(DATETIME = seq.POSIXt(min(.$DATETIME), max(.$DATETIME), by = "30 min")), .) 
  
# join full_output and biomet files
df_ECNM <- left_join(df_full_output, df_biomet, by = "DATETIME") 
df_ECNM <- left_join(df_ECNM, df_stat7, by = "DATETIME") 

# format 
df_ECNM <- df_ECNM %>%
  mutate_at(vars(-filename, -date, -time, -DATETIME, -ends_with("hf"), -ends_with("sf")), ~as.numeric(.))

# change wd to project folder
setwd(dir = "~/TransTiP/_NamCo_sync/3_data_qa-qc/Flux data/ESSD_scripts_review/")

# calculate Li-7500 h2o and co2 concentrations --------------------------------------
df_ECNM$h2o_molar_density_Li7500 <- df_ECNM$mean.h2o.
df_ECNM$h2o_mole_fraction_Li7500 <- df_ECNM$h2o_molar_density_Li7500 * df_ECNM$air_molar_volume 
df_ECNM$h2o_mixing_ratio_Li7500 <- df_ECNM$h2o_molar_density_Li7500 * df_ECNM$air_molar_volume / (1 - df_ECNM$h2o_molar_density_Li7500 * df_ECNM$air_molar_volume * 0.001)

# df_ECNM$co2_molar_density_Li7500 <- df_ECNM$mean.co2.
# df_ECNM$co2_mole_fraction_Li7500 <- df_ECNM$co2_molar_density_Li7500 * df_ECNM$air_molar_volume * 1000
# df_ECNM$co2_mixing_ratio_Li7500 <- df_ECNM$co2_molar_density_Li7500 * df_ECNM$air_molar_volume * 1000 / (1 - df_ECNM$h2o_molar_density_Li7500 * df_ECNM$air_molar_volume * 0.001)


# discard wrong Li7500 data from year 2012 and 2018 and format for use in openeddy
df_ECNM <- df_ECNM %>% 
  mutate_at(.vars = c("co2_flux", "h2o_flux", "LE", 
                      "co2_molar_density", "h2o_molar_density", "h2o_molar_density_Li7500", 
                      "co2_mixing_ratio", "h2o_mixing_ratio", "h2o_mixing_ratio_Li7500"),
            .funs = remove <- function(x) {ifelse(.$DATETIME > "2012-01-30 02:00" & .$DATETIME < "2012-08-31 15:00" |
                                                    .$DATETIME > "2018-06-01 00:00" & .$DATETIME < "2018-06-30 23:30" |
                                                    .$DATETIME > "2006-08-01 00:00" & .$DATETIME < "2006-09-01 23:30", NA, x)}) %>%
  mutate_at(.vars = c("co2_flux", "h2o_flux", "H", "LE",
                      "qc_co2_flux", "qc_h2o_flux", 
                      "h2o_molar_density", "h2o_molar_density_Li7500", "co2_molar_density", 
                      "h2o_mixing_ratio", "h2o_mixing_ratio_Li7500", "co2_mixing_ratio"), as.numeric) 

# generate flag for wind direction filtering from disturbed sector
df_ECNM <- df_ECNM %>%
  mutate(qc_wind_dir = case_when(
    DATETIME <= "2009-06-30 12:30" & (wind_dir < 260 | wind_dir > 280) & (wind_dir < 305 | wind_dir > 325) ~ 0,
    DATETIME > "2009-06-30 12:30" & DATETIME <= "2010-01-30 16:30" & (wind_dir < 260 | wind_dir > 280) & (wind_dir < 10 | wind_dir > 50) ~ 0,
    DATETIME > "2010-01-30 16:30" & DATETIME <= "2011-12-31 23:30" & (wind_dir < 250 | wind_dir > 300) & (wind_dir < 10 | wind_dir > 50) ~ 0,
    DATETIME > "2011-12-31 23:30" & DATETIME <= "2018-12-31 23:30" & (wind_dir < 250 | wind_dir > 315) & (wind_dir < 10 | wind_dir > 50) ~ 0,
    DATETIME > "2018-12-31 23:30" & (wind_dir < 245 | wind_dir > 315) & (wind_dir < 10 | wind_dir > 50) ~ 0,
    TRUE ~ 2)) %>%
  rename(timestamp = DATETIME)

# remove implausible values from NEE and VPD
df_ECNM <- df_ECNM %>%
  mutate(NEE = ifelse(co2_flux < -50 | co2_flux > 100, NA, co2_flux),
         VPD = ifelse(VPD < 0, NA, VPD),
         air_temperature = ifelse(air_temperature > 303.15, NA, air_temperature))

# u* filtering ------------------------------------------------------------
EddyData <- df_ECNM %>%
  transmute(DateTime = timestamp,
            NEE = ifelse(qc_co2_flux == 2 | co2_flux < -50 | co2_flux > 100, NA, co2_flux), # remove unplausible NEE
            Rg = ifelse(is.na(Rg), SWin, Rg), # (W/m²) fill missing SWin with Rg,
            Tair = air_temperature - 273.15,
            rH = RH.x,
            VPD = VPD/100, # VPD in hPa
            Ustar = u.) %>%
  filterLongRuns("NEE")


# Initalize R5 reference class sEddyProc for post-processing of eddy data
EProc <- sEddyProc$new('ECNM', EddyData, c('NEE', 'Rg', 'Tair', 'VPD', 'Ustar', "rH"))

# Estimate Ustar threshold distribution
Ustar_Thr_dist <- EProc$sEstUstarThold()

# get Ustar threshold
Ustar_Thr <- round(unname(pull(Ustar_Thr_dist %>%
                                 filter(aggregationMode == "single") %>%
                                 select(uStar))),2)

# generate flag for ustar filtering
df_ECNM <- df_ECNM %>%
  mutate(qc_ustar = ifelse(u. < Ustar_Thr, 2, 0))

# openeddy ----------------------------------------------------------------
# repeating values as a sign of malfunctioning equipment ------------------
df_ECNM$qc_H_runs <- flag_runs(df_ECNM$H, "qc_H_runs")
table(df_ECNM$qc_H_runs)

df_ECNM$qc_LE_runs <- flag_runs(df_ECNM$LE, "qc_LE_runs")
table(df_ECNM$qc_LE_runs)

df_ECNM$qc_NEE_runs <- flag_runs(df_ECNM$co2_flux, "qc_NEE_runs")
table(df_ECNM$qc_NEE_runs)

df_ECNM$qc_h2o_runs <- flag_runs(df_ECNM$h2o_flux, "qc_h2o_runs")
table(df_ECNM$qc_h2o_runs)

df_ECNM$qc_co2_molar_density_runs <- flag_runs(df_ECNM$co2_molar_density, "qc_co2_molar_density_runs")
table(df_ECNM$qc_co2_molar_density_runs)

df_ECNM$qc_h2o_molar_density_runs <- flag_runs(df_ECNM$h2o_molar_density, "qc_h2o_molar_density_runs")
table(df_ECNM$qc_h2o_molar_density_runs)

df_ECNM$qc_h2o_molar_density_Li7500_runs <- flag_runs(df_ECNM$h2o_molar_density_Li7500, "qc_h2o_molar_density_Li7500_runs")
table(df_ECNM$qc_h2o_molar_density_Li7500_runs)

df_ECNM$qc_co2_mixing_ratio_runs <- flag_runs(df_ECNM$co2_mixing_ratio, "qc_co2_mixing_ratio_runs")
table(df_ECNM$qc_co2_mixing_ratio_runs)

df_ECNM$qc_h2o_mixing_ratio_runs <- flag_runs(df_ECNM$h2o_mixing_ratio, "qc_h2o_mixing_ratio_runs")
table(df_ECNM$qc_h2o_mixing_ratio_runs)

df_ECNM$qc_h2o_mixing_ratio_Li7500_runs <- flag_runs(df_ECNM$h2o_mixing_ratio_Li7500, "qc_h2o_mixing_ratio_Li7500_runs")
table(df_ECNM$qc_h2o_mixing_ratio_Li7500_runs)

# extract hard flags ------------------------------------------------------
# skewness and kurtosis
skw_kur_hf <- extract_coded(df_ECNM$skewness_kurtosis_hf, prefix = "[8]", split = "[/]") %>%
  rename(qc_SA_skw_kur_HF = SA,
         qc_SA_IRGA_skw_kur_HF = SA_IRGA)

df_ECNM <- cbind(df_ECNM, skw_kur_hf)

# combine all qc flags to preliminary composite where necessary
df_ECNM$qc_H_prelim <- 
  combn_QC(df_ECNM, c("qc_H", "qc_H_runs", "qc_SA_skw_kur_HF", "qc_wind_dir", "qc_ustar"),
           "qc_H_prelim", additive = FALSE, na.as = NA)

df_ECNM$qc_LE_prelim <- 
  combn_QC(df_ECNM, c("qc_LE", "qc_LE_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar"),
           "qc_LE_prelim", additive = FALSE, na.as = NA)


df_ECNM$qc_NEE_prelim <-
  combn_QC(df_ECNM, c("qc_co2_flux", "qc_NEE_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar"),
           "qc_NEE_prelim", additive = FALSE, na.as = NA)

df_ECNM$qc_h2o_prelim <- 
  combn_QC(df_ECNM, c("qc_h2o_flux", "qc_h2o_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar"), 
           "qc_h2o_prelim", additive = FALSE, na.as = NA)

# use preliminary composite for despiking ---------------------------------
df_ECNM$qc_NEE_despikeLF <- 
  despikeLF(df_ECNM, "co2_flux", "qc_NEE_prelim", "qc_NEE_despikeLF", var_thr = c(-50,50), light = NULL)

df_ECNM$qc_h2o_despikeLF <- 
  despikeLF(df_ECNM, "h2o_flux", "qc_h2o_prelim", "qc_h2o_despikeLF", var_thr = c(-50,50), light = NULL)

df_ECNM$qc_H_despikeLF <- 
  despikeLF(df_ECNM, "H", "qc_H_prelim", "qc_H_despikeLF", var_thr = c(-300,600), light = NULL)

df_ECNM$qc_LE_despikeLF <- 
  despikeLF(df_ECNM, "LE", "qc_LE_prelim", "qc_LE_despikeLF", var_thr = c(-300,600), light = NULL)

df_ECNM$qc_co2_molar_density_despikeLF <-
  despikeLF(df_ECNM, "co2_molar_density", "qc_co2_molar_density_runs", "qc_co2_molar_density_despikeLF", var_thr = c(0,40),
            light = NULL)

df_ECNM$qc_h2o_molar_density_despikeLF <-
  despikeLF(df_ECNM, "h2o_molar_density", "qc_h2o_molar_density_runs", "qc_h2o_molar_density_despikeLF", var_thr = c(0,1000),
            light = NULL)

df_ECNM$qc_h2o_molar_density_Li7500_despikeLF <-
  despikeLF(df_ECNM, "h2o_molar_density_Li7500", "qc_h2o_molar_density_Li7500_runs", "qc_h2o_molar_density_Li7500_despikeLF", var_thr = c(0,1000),
            light = NULL)

df_ECNM$qc_co2_mixing_ratio_despikeLF <-
  despikeLF(df_ECNM, "co2_mixing_ratio", "qc_co2_mixing_ratio_runs", "qc_co2_mixing_ratio_despikeLF", var_thr = c(100,800),
            light = NULL)

df_ECNM$qc_h2o_mixing_ratio_despikeLF <-
  despikeLF(df_ECNM, "h2o_mixing_ratio", "qc_h2o_mixing_ratio_runs", "qc_h2o_mixing_ratio_despikeLF", var_thr = c(0,40),
            light = NULL)

df_ECNM$qc_h2o_mixing_ratio_Li7500_despikeLF <-
  despikeLF(df_ECNM, "h2o_mixing_ratio_Li7500", "qc_h2o_mixing_ratio_Li7500_runs", "qc_h2o_mixing_ratio_Li7500_despikeLF", var_thr = c(0,40),
            light = NULL)

# combine hard flags to QC composite  -------------------------------------
## concentrations:
# molar density
df_ECNM$qc_co2_molar_density_composite <-
  combn_QC(df_ECNM, c("qc_co2_molar_density_runs", "qc_co2_molar_density_despikeLF"),
           "qc_co2_molar_density_composite", additive = FALSE, na.as = c(NA, 0))

df_ECNM$qc_h2o_molar_density_composite <-
  combn_QC(df_ECNM, c("qc_h2o_molar_density_runs", "qc_h2o_molar_density_despikeLF"),
           "qc_h2o_molar_density_composite", additive = FALSE, na.as = c(NA, 0))

df_ECNM$qc_h2o_molar_density_Li7500_composite <-
  combn_QC(df_ECNM, c("qc_h2o_molar_density_Li7500_runs", "qc_h2o_molar_density_Li7500_despikeLF"),
           "qc_h2o_molar_density_Li7500_composite", additive = FALSE, na.as = c(NA, 0))

# mixing ratio
df_ECNM$qc_co2_mixing_ratio_composite <-
  combn_QC(df_ECNM, c("qc_co2_mixing_ratio_runs", "qc_co2_mixing_ratio_despikeLF"),
           "qc_co2_mixing_ratio_composite", additive = FALSE, na.as = c(NA, 0))

df_ECNM$qc_h2o_mixing_ratio_composite <-
  combn_QC(df_ECNM, c("qc_h2o_mixing_ratio_runs", "qc_h2o_mixing_ratio_despikeLF"),
           "qc_h2o_mixing_ratio_composite", additive = FALSE, na.as = c(NA, 0))

df_ECNM$qc_h2o_mixing_ratio_Li7500_composite <-
  combn_QC(df_ECNM, c("qc_h2o_mixing_ratio_Li7500_runs", "qc_h2o_mixing_ratio_Li7500_despikeLF"),
           "qc_h2o_mixing_ratio_Li7500_composite", additive = FALSE, na.as = c(NA, 0))

## fluxes:
# H
df_ECNM$qc_H_composite <- 
  combn_QC(df_ECNM, c("qc_H", "qc_H_runs", "qc_SA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_H_prelim", "qc_H_despikeLF"), 
           "qc_H_composite", additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0))

# LE
df_ECNM$qc_LE_composite <- 
  combn_QC(df_ECNM, c("qc_LE", "qc_LE_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_LE_prelim", "qc_LE_despikeLF"), 
           "qc_LE_composite", additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0)) 

# FCO2
df_ECNM$qc_NEE_composite <- 
  combn_QC(df_ECNM, c("qc_co2_flux", "qc_NEE_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_NEE_prelim", "qc_NEE_despikeLF", "qc_co2_molar_density_composite"), 
  "qc_NEE_composite", additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0, NA))

# FH2O
df_ECNM$qc_h2o_composite <- 
  combn_QC(df_ECNM, c("qc_h2o_flux", "qc_h2o_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_h2o_prelim", "qc_h2o_despikeLF", "qc_h2o_molar_density_Li7500_composite"), 
  "qc_h2o_composite", additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0, NA)) 


# plots
p1 <- summary_QC(df_ECNM, c("qc_H", "qc_H_runs", "qc_SA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_H_prelim", "qc_H_despikeLF"), 
                 additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0), cumul = TRUE, plot = TRUE, flux = "H") +
  theme(axis.title.x = element_blank())

p2 <- summary_QC(df_ECNM, c("qc_LE", "qc_LE_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_LE_prelim", "qc_LE_despikeLF"), 
                 additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0), cumul = TRUE, plot = TRUE, flux = "LE")+
  theme(axis.title.x = element_blank())

p3 <- summary_QC(df_ECNM, c("qc_co2_flux", "qc_NEE_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_NEE_prelim", "qc_NEE_despikeLF", "qc_co2_molar_density_composite"), 
                 additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0, NA), cumul = TRUE, plot = TRUE, flux = "co2_flux")+
  theme(axis.title.x = element_blank())

p4 <- summary_QC(df_ECNM, c("qc_h2o_flux", "qc_h2o_runs", "qc_SA_IRGA_skw_kur_HF", "qc_wind_dir", "qc_ustar", "qc_h2o_prelim", "qc_h2o_despikeLF", "qc_h2o_molar_density_Li7500_composite"), 
                 additive = FALSE, na.as = c(NA, NA, NA, NA, NA, NA, 0, NA), cumul = TRUE, plot = TRUE, flux = "h2o_flux")+
  theme(axis.title.x = element_blank())

p_QC <- plot_grid(p1, p2, p3, p4, ncol = 2)
p_QC
ggsave(paste("QC_plot_", dataset, ".png", sep = ""), plot = p_QC, dpi = 300)

# export results ----------------------------------------------------------
df_ECNM %>%
  rename(DATETIME = timestamp) %>%
  mutate(DATETIME = as.character(DATETIME)) %>%
  fwrite(file = paste("df_", dataset, "_openeddy.csv", sep = ""))



